rm(list=ls())
setwd("~/Downloads/dataverse_files(3)")
library(tidyverse)

getmode <- function(v, na.omit = TRUE){
  if (na.omit == TRUE) {
    v <- na.omit(v)  
  }
  uv <- unique(v)
  tab <- tabulate(match(v, uv))
  out <- uv[tab == max(tab)]
  if(length(out) > 1){
    out <- sample(out,1)
  }
  return(out)
}
"%nin%" <- Negate("%in%")

hearings<- read_csv("Data/CongressDocumentsCitations.csv")

doi_id <- read_csv("Data/CongTTDOI.csv")

#clustering code preserved for transparency
# library(M3C)
# library(ClusterR)
# 
# cluster_papers <- function(data){
#   bsize = nrow(data)/3
#   bsize = min(c(1000,bsize))
#   
#   max_clust = min(nrow(data) - 2, 75)
#   
#   cluster_dat <- data %>% dplyr::select(contains("dim"))
#   if (nrow(cluster_dat) > 50){
#   pmk <- Optimal_Clusters_KMeans(as.matrix(cluster_dat), max_clusters = max_clust, verbose = T, criterion = "BIC",
#                                  mini_batch_params = list("batch_size" =  bsize, "init_fraction" = .1, "early_stop_iter" = 10), seed = 666)
#   
#   
#   
#   num_clust <- which(pmk == min(pmk))
#   
#   pmk_mini <- MiniBatchKmeans(
#     as.matrix(cluster_dat),
#     clusters = num_clust,
#     batch_size = bsize,
#     num_init = 1,
#     max_iters = 100,
#     init_fraction = 1,
#     initializer = "kmeans++",
#     early_stop_iter = 10,
#     verbose = T,
#     CENTROIDS = NULL,
#     tol = 1e-04,
#     tol_optimal_init = 0.3,
#     seed = 1
#   )
#   
#   cluster_assign <- predict_MBatchKMeans(as.matrix(cluster_dat), pmk_mini$centroids, fuzzy = FALSE)
#   
#   data$cluster <- cluster_assign
#   } else {
#     data$cluster <- NA
#   }
#   gc()
#   return(data)
#   
# }


#load pre-clustered data
load("Data/Committee_Party_Citation_Clusters.RData")

install.packages("StatMatch")
library(StatMatch)
library(boot)

get_sim <- function(data, perm = FALSE){
  if (perm == TRUE) { data$party_citation <- sample(data$party_citation) }  
  if (length(unique(data$cluster)) < 2){
    result <- NA
  }  else if (length(unique(data$party_citation)) == 1){
    result <- NA
  } else {
  data$cluster <- as.factor(data$cluster)
  num_clust <- length(unique(data$cluster))
  Dvec <- rep(0, num_clust)
  Rvec <- rep(0, num_clust)
  Bvec <- rep(0, num_clust)
  clust_tab <-table(data$party_citation, data$cluster)
  if ("D" %in% rownames(clust_tab)){
    Dvec <- clust_tab["D",]
  }
  if ("R" %in% rownames(clust_tab)){
    Rvec <- clust_tab["R",]
  }
  if ("Both" %in% rownames(clust_tab)){
    Bvec <- clust_tab["Both",]
  }
  Dvec <- Dvec + Bvec
  Rvec <- Rvec + Bvec
  result <- comp.prop(Dvec,Rvec, n1 = nrow(filter(data, party_citation == "D" | party_citation == "Both")),
                              n2 = nrow(filter(data, party_citation == "R" | party_citation == "Both")))
  return(result)
}
}
comm_similarities <- committee_party_citation %>% map(~get_sim(.))
comm_similarities <- comm_similarities %>% discard(~ identical(., NA))
comm_similarities_meas <- do.call(rbind, lapply(comm_similarities, function(i) i$meas)) %>% as_tibble(rownames = "Committee")

#permuted similarities
comm_similarities_perm <- rerun(100, map(committee_party_citation, ~get_sim(., perm = TRUE)))

filter_inner_lists <- function(nested_list) {
  # Use the purrr::discard() function to remove elements exactly identical to NA
  purrr::discard(nested_list, ~ isTRUE(all(is.na(.x))))
}

# Apply the function to each of the nested lists inside the top_list
comm_similarities_perm <- lapply(comm_similarities_perm, filter_inner_lists)

# If you want to further remove outer list elements that are now empty, you can do so:

extract_meas_data <- function(lst) {
  map_dfr(names(lst), function(second_name) {
    sub_list <- lst[[second_name]]
    if ("meas" %in% names(sub_list)) {
      tibble(second_name = second_name, meas_data = list(sub_list$meas))
    } else {
      tibble()
    }
  })
}

# Extract the 'meas' data
final_df <- map_dfr(comm_similarities_perm, ~extract_meas_data(.x))
final_df$type %>% uni
# Unnest the 'meas_data' column to create the final dataframe
final_df <- final_df %>%
  unnest_wider(meas_data)

final_df <- final_df %>% pivot_longer(cols = 2:5) %>% rename(Committee = second_name)
final_df$type <- "Permuted Null"

comm_similarities_meas <- comm_similarities_meas %>% pivot_longer(2:5)
comm_similarities_meas$type <- "Observed Value"
cong_sim_measure_overlap <- final_df %>% dplyr::filter(name == "overlap") %>% ggplot(aes(x=value, y = name, fill = type, color = type)) + 
  stat_slabinterval(slab_alpha = .5, 
                    point_interval = "median_qi", .width = .75, 
                    scale = 1, 
                    normalize = "xy") + 
  stat_dots(data = filter(comm_similarities_meas,name == "overlap"), 
            aes(x=value, y=name), 
            slab_alpha = .5, 
            binwidth = .01) + theme_classic() +ylab("") + xlab("Score")


ggsave("Output/FigS20G.pdf", cong_sim_measure_overlap, width = 9, height = 3)



library(cramer)
calc_cramer<- function(data){
  print(unique(data$Committee_short))
  if (nrow(data) < 50) {
    result <- list("statistic" = NA,
                   "crit.value" = NA,
                   "n" = NA,
                   "m"= NA, 
                   "p.value" = NA)} 
  else if (length(unique(data$party_citation))  < 2 ) {
    result <- list("statistic" = NA,
                   "crit.value" = NA,
                   "n" = NA,
                   "m"= NA, 
                   "p.value" = NA) 
    
  } else {
  D_mat <- data %>% filter(party_citation == "D" | party_citation == "Both")  %>% dplyr::select(contains("dim")) %>% as.matrix()
  R_mat <- data %>% filter(party_citation == "R" | party_citation == "Both")  %>% dplyr::select(contains("dim")) %>% as.matrix()
  ct <- cramer.test(x = D_mat, y = R_mat)
  return(ct)
  }
}

embeddings_committee_cramer <- committee_party_citation %>% map(~calc_cramer(.))
cramer_statistic <- embeddings_committee_cramer %>% map(~.$statistic) %>% unlist()
cramer_crit <- embeddings_committee_cramer %>% map(~.$crit.value) %>% unlist()

cramer_pvalue <- embeddings_committee_cramer %>% map(~.$p.value) %>% unlist()


committee_names <- read_csv("Data/OvertonCommitteeNames_Standard.csv") %>% group_by(committee_short) %>% summarise(comm_name = getmode(paste(chamber, committee))) %>% 
    mutate(comm_name = str_replace_all(comm_name, "House House", "House"), 
    comm_name = str_replace_all(comm_name, "Senate Senate", "Senate"))

cramer_dists <- bind_cols(names(committee_party_citation), cramer_statistic, cramer_crit, cramer_pvalue)

names(cramer_dists) <- c("committee_short", "cramer_dist","crit.value", "p.value")

cramer_dists <- cramer_dists %>% left_join(committee_names)
cramer_dists



cramer_dists <- cramer_dists %>% mutate(p.label = case_when(
  p.value <= .00005 ~ "****",
  p.value <= .0005 ~ "***", 
  p.value <= .005 ~ "**",
  p.value <= .05 ~ "*",
  p.value <= .1 ~ "+",
  TRUE ~ "ns"
))

cramer_dists <- cramer_dists %>% mutate(value_delta = cramer_dist - crit.value,  value_ratio = cramer_dist/crit.value)
cramer_dists <- cramer_dists %>% filter(!is.na(cramer_dist)) %>% arrange(value_ratio)
cramer_dists$ratio_rank <- 1:nrow(cramer_dists)

cr_plot <- cramer_ratio_plot <- cramer_dists %>% ggplot(aes(x=value_ratio)) + 
  stat_ecdf(geom = "step", pad =F) + 
  geom_vline(xintercept = .9, linetype = 3, color = "grey") + 
  geom_vline(xintercept = 1, linetype = 4) +
  geom_text(aes(x=,.9, label="\nCritical value for p < .1", y=.5), colour="grey", angle=90,nudge_x = -.4, size = 3) +
  geom_text(aes(x=1, label="\nCritical value for p < .05\n", y=.5), colour="black", angle=90 , nudge_x = .2, size = 3) +
  theme_classic()  + xlab("Cramer Statistic/Critical Value") 

ggsave("Output/FigS20I.pdf",  cr_plot, width = 9, height = 3)



library(Rtsne)
library(randomcoloR)
library(DescTools)
library(ggfittext)
library(cowplot)
library(plotly)
library(htmlwidgets)
library(RColorBrewer)

overtoncited <- read_csv("Data/OvertonCitedPapers.csv") %>% 
  dplyr::select(doi, journal.title, year, title) %>% filter(doi %in% doi_id$externalids)


committee_party_citation <- committee_party_citation %>% map(~left_join(., overtoncited, by = c("dois_cited" = "doi")))

plot_comm_clusters <- function(data){
  if (is.null(data$cluster)) {
    data$cluster <- 1
  }
  if (nrow(data) > 30){
    cited_ideo <- data%>% dplyr::select(dois_cited, party_citation) %>% unique()
    data_unique<- data %>% group_by_at(6:773) %>% summarise_all(getmode) %>% ungroup() %>% left_join(cited_ideo)
    committee <- data$Committee_short %>% unique()
    tsne_val <- Rtsne(as.matrix(dplyr::select(data_unique,  contains("dim"))))
    df <- bind_cols(as_tibble(tsne_val$Y), "party_citation" = data_unique$party_citation, "cluster" = data_unique$cluster, 
                    "journal.title" = data_unique$journal.title, "year" = data_unique$year, "title" = data_unique$title )
    palette <- distinctColorPalette(length(unique(df$cluster)))
    print(length(palette))
    names(palette) <- levels(as.factor(df$cluster))
    colScale <- scale_colour_manual(name = "cluster",values = palette)
    fillScale <- scale_fill_manual(name = "cluster",values = palette)
    
    
    p1 <- ggplot(df, aes(x=V1, y=V2, color = as.factor(party_citation))) + geom_point(alpha = .4, size = .5) + 
      scale_color_manual(values=c("#666666", "#156B90","#9A3E25"))+theme_void() + 
      ggtitle("All Cited Science") + theme(legend.position = "none")
    
    fig <- plot_ly(type = "scatter",
                   df, x = ~V1, y = ~V2,
                   # Hover text:
                   text = ~paste("Journal: ", journal.title, '<br>Title:', title, '<br>Year:', year),
                   color = ~party_citation,
                   colors = c("#666666", "#156B90","#9A3E25"),
                   marker = list(
                     opacity = 0.5) )%>% layout(title =paste("SPECTER embeddings of papers cited in", committee, "by Congressional committees"))
    
    
    saveWidget(fig, paste0("Output/FigS20A-C.html"), selfcontained = T)
    df$cluster <- as.factor(df$cluster)
    #p1_bar <- ggplot(df, aes(x=fct_infreq(as.factor(party_citation)), fill = as.factor(party_citation))) + 
    #  geom_bar() + scale_fill_manual(values=c("#666666", "#156B90","#9A3E25"))  +theme_void()+ theme(legend.position = "none")
    
    p_d <- ggplot(dplyr::filter(df, party_citation %in% c("D", "Both")), aes(x=V1, y=V2, color = as.factor(cluster))) + 
      geom_point(alpha = .4, size = .5)  + colScale +
      theme_void() + theme(legend.position = "none") +ggtitle("Democrat Cited Science") +
      xlim(layer_scales(p1)$x$range$range) + ylim(layer_scales(p1)$y$range$range)
    
    dem_counts <- df %>% dplyr::filter( party_citation %in% c("D", "Both")) %>% group_by(cluster) %>% summarise(num = n()) %>% mutate(party = "Democrat\ncited")
    rep_counts <- df %>% dplyr::filter( party_citation %in% c("R", "Both")) %>% group_by(cluster) %>% summarise(num = n()) %>% mutate(party = "Republican\ncited")
    
    party_counts <- bind_rows(dem_counts, rep_counts) %>% mutate(cluster = as.factor(cluster))
    
    clust_bar <- party_counts %>% ggplot(aes(y = cluster, x=num, fill = cluster)) + geom_col() + theme_void() +facet_wrap(~party) + fillScale  +theme_void()+ theme(legend.position = "none")

    p_r <- ggplot(dplyr::filter(df, party_citation %in% c("R", "Both")), aes(x=V1, y=V2, color = as.factor(cluster))) + 
      geom_point(alpha = .4, size = .5)  + colScale +
      theme_void() + theme(legend.position = "none") +ggtitle("Republican Cited Science") +
      xlim(layer_scales(p1)$x$range$range) + ylim(layer_scales(p1)$y$range$range)
    
    #p_r_bar <- dplyr::filter(df, party_citation %in% c("R", "Both")) %>% group_by(cluster) %>% summarise(num = n()) %>% ggplot( aes(x=as.factor(cluster), label = as.character(cluster), y = num, fill = as.factor(cluster))) + 
    #  geom_col() + fillScale  +theme_void()+ theme(legend.position = "none") + geom_bar_text(place= "top", reflow = T, min.size =4, outside = T)
    
    title <- ggdraw() + draw_label(paste("SPECTER embeddings of papers cited in", committee, "documents by Congressional Committees"), fontface='bold')
    
    p <- cowplot::plot_grid(p_d,clust_bar, p_r, nrow = 1, rel_widths = c(1, .25, 1), scale = 0.9)
    p <- cowplot::plot_grid(title, p, ncol=1, rel_heights=c(0.1, 1)) # rel_heights values control title margins
    ggsave(paste("Output/Fig3_FigS20A-C.pdf", sep = "_" ), device= "pdf", width=10, height = 4, units = "in")
  }
}






plot_comm_clusters(committee_party_citation[["hsif00"]])





############### Think Tanks


rm(list=ls())
setwd("~/Downloads/dataverse_files(3)")

library(tidyverse)

getmode <- function(v, na.omit = TRUE){
  if (na.omit == TRUE) {
    v <- na.omit(v)  
  }
  uv <- unique(v)
  tab <- tabulate(match(v, uv))
  out <- uv[tab == max(tab)]
  if(length(out) > 1){
    out <- sample(out,1)
  }
  return(out)
}


ttdocs<- read_csv("Data/ThinkTankDocumentsCitations.csv")



doi_id <- read_csv("Data/CongTTDOI.csv") %>% dplyr::select(-`...1`)



library(ClusterR)
#clustering code preserved for transparency

# cluster_papers <- function(data){
#   bsize = nrow(data)/3
#   bsize = min(c(1000,bsize))
#   
#   max_clust = min(nrow(data) - 2, 75)
#   
#   cluster_dat <- data %>% dplyr::select(contains("dim"))
#   if (nrow(cluster_dat) > 50){
#     pmk <- Optimal_Clusters_KMeans(as.matrix(cluster_dat), max_clusters = max_clust, verbose = T, criterion = "BIC",
#                                    mini_batch_params = list("batch_size" =  bsize, "init_fraction" = .1, "early_stop_iter" = 10), seed = 666)
#     
#     
#     
#     num_clust <- which(pmk == min(pmk))
#     
#     pmk_mini <- MiniBatchKmeans(
#       as.matrix(cluster_dat),
#       clusters = num_clust,
#       batch_size = bsize,
#       num_init = 1,
#       max_iters = 100,
#       init_fraction = 1,
#       initializer = "kmeans++",
#       early_stop_iter = 10,
#       verbose = T,
#       CENTROIDS = NULL,
#       tol = 1e-04,
#       tol_optimal_init = 0.3,
#       seed = 1
#     )
#     
#     cluster_assign <- predict_MBatchKMeans(as.matrix(cluster_dat), pmk_mini$centroids, fuzzy = FALSE)
#     
#     data$cluster <- cluster_assign
#   } else {
#     data$cluster <- NA
#   }
#   gc()
#   return(data)
#   
# }

#load precalculated clustering
load("Data/ThinkTankClassification_Citation_Clusters.RData")




get_sim <- function(data, perm = F){
  if (perm == TRUE) { data$party_citation <- sample(data$party_citation) }  
  if (length(unique(data$cluster)) < 2){
    result <- NA
  }  else if (length(unique(data$party_citation)) == 1){
    result <- NA
  } else {
  data$cluster <- as.factor(data$cluster)
  num_clust <- length(unique(data$cluster))
  Dvec <- rep(0, num_clust)
  Rvec <- rep(0, num_clust)
  Bvec <- rep(0, num_clust)
  clust_tab <-table(data$party_citation, data$cluster)
  if ("L" %in% rownames(clust_tab)){
    Dvec <- clust_tab["L",]
  }
  if ("R" %in% rownames(clust_tab)){
    Rvec <- clust_tab["R",]
  }
  if ("Both" %in% rownames(clust_tab)){
    Bvec <- clust_tab["Both",]
  }
  Dvec <- Dvec + Bvec
  Rvec <- Rvec + Bvec
  result <- comp.prop(Dvec,Rvec, n1 = nrow(filter(data, party_citation == "L" | party_citation == "Both")),
                      n2 = nrow(filter(data, party_citation == "R" | party_citation == "Both")))
  return(result)
}
}

tt_similarities <- tt_party_citation %>% map(~get_sim(.))
tt_similarities <- tt_similarities %>% discard(~ identical(., NA))
tt_similarities_meas <- do.call(rbind, lapply(tt_similarities, function(i) i$meas)) %>% as_tibble(rownames = "Issue")

#permuted similarities
tt_similarities_perm <- rerun(100, map(tt_party_citation, ~get_sim(., perm = TRUE)))

filter_inner_lists <- function(nested_list) {
  # Use the purrr::discard() function to remove elements exactly identical to NA
  purrr::discard(nested_list, ~ isTRUE(all(is.na(.x))))
}

# Apply the function to each of the nested lists inside the top_list
tt_similarities_perm <- lapply(tt_similarities_perm, filter_inner_lists)

# If you want to further remove outer list elements that are now empty, you can do so:

extract_meas_data <- function(lst) {
  map_dfr(names(lst), function(second_name) {
    sub_list <- lst[[second_name]]
    if ("meas" %in% names(sub_list)) {
      tibble(second_name = second_name, meas_data = list(sub_list$meas))
    } else {
      tibble()
    }
  })
}

# Extract the 'meas' data
final_df <- map_dfr(tt_similarities_perm, ~extract_meas_data(.x))

# Unnest the 'meas_data' column to create the final dataframe
final_df <- final_df %>%
  unnest_wider(meas_data)


final_df <- final_df %>% pivot_longer(cols = 2:5) %>% rename(Issue = second_name)
final_df$type <- "Permuted Null"


tt_similarities_meas <- tt_similarities_meas %>% pivot_longer(2:5)
tt_similarities_meas$type <- "Observed Values"

tt_sim_measure_overlap <- final_df %>% filter(name == "overlap") %>% ggplot(aes(x=value, y = name, fill = type, color = type)) + 
  stat_slabinterval(slab_alpha = .5, 
                    point_interval = "median_qi", .width = .75, 
                    scale = 1, 
                    normalize = "xy") + 
  stat_dots(data = filter(tt_similarities_meas,name == "overlap"), 
            aes(x=value, y=name), 
            slab_alpha = .5, 
            binwidth = .01) + theme_classic() +ylab("") + xlab("Score")


ggsave("Output/FigS22H.pdf", tt_sim_measure_overlap, width = 9, height = 3)





library(cramer)
calc_cramer<- function(data){
  print(unique(data$classifications))
  if (nrow(data) < 50) {
    result <- list("statistic" = NA,
                   "crit.value" = NA,
                   "n" = NA,
                   "m"= NA, 
                   "p.value" = NA)} 
  else if (length(unique(data$party_citation))  < 2 ) {
    result <- list("statistic" = NA,
                   "crit.value" = NA,
                   "n" = NA,
                   "m"= NA, 
                   "p.value" = NA) 
    
  } else {
    D_mat <- data %>% filter(party_citation == "L" | party_citation == "Both")  %>% dplyr::select(contains("dim"))
    if (nrow(D_mat) > 1000){
      D_mat <- D_mat %>% sample_n(1000)
    }
    R_mat <- data %>% filter(party_citation == "R" | party_citation == "Both")  %>% dplyr::select(contains("dim")) 
    if (nrow(R_mat) > 1000){
      R_mat <- R_mat %>% sample_n(1000)
    }
    ct <- cramer.test(x = as.matrix(D_mat), y = as.matrix(R_mat))
    return(ct)
  }
}

embeddings_tt_cramer <- tt_party_citation %>% map(~calc_cramer(.))
cramer_statistic <- embeddings_tt_cramer %>% map(~.$statistic) %>% unlist()
cramer_crit <- embeddings_tt_cramer %>% map(~.$crit.value) %>% unlist()

cramer_pvalue <- embeddings_tt_cramer %>% map(~.$p.value) %>% unlist()

cramer_dists <- bind_cols(names(embeddings_tt_cramer), cramer_statistic, cramer_crit, cramer_pvalue)

names(cramer_dists) <- c("Issue", "cramer_dist","crit.value", "p.value")

cramer_dists$Issue <- fct_reorder(cramer_dists$Issue, cramer_dists$cramer_dist)

cramer_dists <- cramer_dists %>% mutate(p.label = case_when(
  p.value <= .00005 ~ "****",
  p.value <= .0005 ~ "***", 
  p.value <= .005 ~ "**",
  p.value <= .05 ~ "*",
  p.value <= .1 ~ "+",
  TRUE ~ "ns"
))

cramer_dists <- cramer_dists %>% mutate(value_delta = cramer_dist - crit.value,  value_ratio = cramer_dist/crit.value)
cramer_dists <- cramer_dists %>% filter(!is.na(cramer_dist)) %>% arrange(value_ratio)
cramer_dists$ratio_rank <- 1:nrow(cramer_dists)

cr_plot <- cramer_ratio_plot <- cramer_dists %>% ggplot(aes(x=value_ratio)) + 
  stat_ecdf(geom = "step", pad =F) + 
  geom_vline(xintercept = .9, linetype = 3, color = "grey") + 
  geom_vline(xintercept = 1, linetype = 4) +
  geom_text(aes(x=,.9, label="\nCritical value for p < .1", y=.5), colour="grey", angle=90,nudge_x = -.4, size = 3) +
  geom_text(aes(x=1, label="\nCritical value for p < .05\n", y=.5), colour="black", angle=90 , nudge_x = .2, size = 3) +
  theme_classic()  + xlab("Cramer Statistic/Critical Value") 

ggsave("Output/FigS22J.pdf",  cr_plot, width = 9, height = 3)



library(Rtsne)
library(randomcoloR)
library(DescTools)
library(ggfittext)
library(cowplot)
library(plotly)
library(htmlwidgets)
library(RColorBrewer)
doi_id <- read_csv("Data/CongTTDOI.csv") %>% dplyr::select(-`...1`)

overtoncited <- read_csv("Data/OvertonCitedPapers.csv") %>% 
  dplyr::select(doi, journal.title, year, title) %>% filter(doi %in% doi_id$externalids)


tt_party_citation <- tt_party_citation %>% map(~left_join(., overtoncited, by = c("dois_cited" = "doi")))


plot_tt_clusters <- function(data){
  if (is.null(data$cluster)) {
    data$cluster <- 1
  }
  if (nrow(data) > 30){
    cited_ideo <- data%>% dplyr::select(dois_cited, party_citation) %>% unique()
    data_unique<- data %>% group_by_at(6:773) %>% summarise_all(getmode) %>% ungroup() %>% left_join(cited_ideo)
    issue <- data$classifications %>% unique()
    tsne_val <- Rtsne(as.matrix(dplyr::select(data_unique,  contains("dim"))))
    df <- bind_cols(as_tibble(tsne_val$Y), "party_citation" = data_unique$party_citation, "cluster" = data_unique$cluster, 
                    "journal.title" = data_unique$journal.title, "year" = data_unique$year, "title" = data_unique$title )
    palette <- distinctColorPalette(length(unique(df$cluster)))
    print(length(palette))
    names(palette) <- levels(as.factor(df$cluster))
    colScale <- scale_colour_manual(name = "cluster",values = palette)
    fillScale <- scale_fill_manual(name = "cluster",values = palette)
    
    
    p1 <- ggplot(df, aes(x=V1, y=V2, color = as.factor(party_citation))) + geom_point(alpha = .4, size = .5) + 
      scale_color_manual(values=c("#666666", "#156B90","#9A3E25"))+theme_void() + 
      ggtitle("All Cited Science") + theme(legend.position = "none")
    
    fig <- plot_ly(type = "scatter",
                   df, x = ~V1, y = ~V2,
                   # Hover text:
                   text = ~paste("Journal: ", journal.title, '<br>Title:', title, '<br>Year:', year),
                   color = ~party_citation,
                   colors = c("#666666", "#156B90","#9A3E25"),
                   marker = list(
                     opacity = 0.5) )%>% layout(title =paste("SPECTER embeddings of papers cited in", issue, "documents by ideological think tanks"))
    
    
    saveWidget(fig, paste0("Output/FigS20D-F.html"), selfcontained = T)
    df$cluster <- as.factor(df$cluster)
    #p1_bar <- ggplot(df, aes(x=fct_infreq(as.factor(party_citation)), fill = as.factor(party_citation))) + 
    #  geom_bar() + scale_fill_manual(values=c("#666666", "#156B90","#9A3E25"))  +theme_void()+ theme(legend.position = "none")
    
    left_counts <- df %>% dplyr::filter( party_citation %in% c("L", "Both")) %>% group_by(cluster) %>% summarise(num = n()) %>% mutate(party = "left\ncited")
    right_counts <- df %>% dplyr::filter( party_citation %in% c("R", "Both")) %>% group_by(cluster) %>% summarise(num = n()) %>% mutate(party = "Right\ncited")
    
    party_counts <- bind_rows(left_counts, right_counts) %>% mutate(cluster = as.factor(cluster))
    
    clust_bar <- party_counts %>% ggplot(aes(y = cluster, x=num, fill = cluster)) + geom_col() + theme_void() +facet_wrap(~party) + fillScale  +theme_void()+ theme(legend.position = "none")
    
    p_d <- ggplot(dplyr::filter(df, party_citation %in% c("L", "Both")), aes(x=V1, y=V2, color = as.factor(cluster))) + 
      geom_point(alpha = .4, size = .5)  + colScale +
      theme_void() + theme(legend.position = "none") +ggtitle("Left Cited Science") +
      xlim(layer_scales(p1)$x$range$range) + ylim(layer_scales(p1)$y$range$range)
    
    #p_d_bar <- dplyr::filter(df, party_citation %in% c("L", "Both")) %>% group_by(cluster) %>% summarise(num = n()) %>% ggplot( aes(x=as.factor(cluster), label = as.character(cluster), y = num, fill = as.factor(cluster))) + 
    #  geom_col() + fillScale  +theme_void()+ theme(legend.position = "none") + geom_bar_text(place= "top", reflow = T, min.size =4, outside = T)
    
    p_r <- ggplot(dplyr::filter(df, party_citation %in% c("R", "Both")), aes(x=V1, y=V2, color = as.factor(cluster))) + 
      geom_point(alpha = .4, size = .5)  + colScale +
      theme_void() + theme(legend.position = "none") +ggtitle("Right Cited Science") +
      xlim(layer_scales(p1)$x$range$range) + ylim(layer_scales(p1)$y$range$range)
    
   # p_r_bar <- dplyr::filter(df, party_citation %in% c("R", "Both")) %>% group_by(cluster) %>% summarise(num = n()) %>% ggplot( aes(x=as.factor(cluster), label = as.character(cluster), y = num, fill = as.factor(cluster))) + 
   #    geom_col() + fillScale  +theme_void()+ theme(legend.position = "none") + geom_bar_text(place= "top", reflow = T, min.size =4, outside = T)
    
    title <- ggdraw() + draw_label(paste("SPECTER embeddings of papers cited in", issue, "documents by ideological think tanks"), fontface='bold')
    
    p <- cowplot::plot_grid(p_d,clust_bar, p_r, nrow = 1, rel_widths = c(1, .25, 1), scale = 0.9)
    p <- cowplot::plot_grid(title, p, ncol=1, rel_heights=c(0.1, 1)) # rel_heights values control title margins
    ggsave(paste("Output/FigS20D-F.pdf", sep = "_" ), device= "pdf", width=10, height = 4, units = "in")
  }
}


plot_tt_clusters(tt_party_citation[["weather"]])











